The PGC3 MDD study is including the following analyses to identify genes associated with MDD:


Read in results from all analyses

Show code

##########
# Nearest gene
##########

library(data.table)

# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread('../../docs/tables/meta_snps_full_eur.cojo.txt')

# Link SNPs to nearest gene
# Insert nearest gene information
library(biomaRt)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
library(GenomicRanges)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
## 
##     shift
## Loading required package: GenomeInfoDb
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]

# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]

# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol')], by=c('ensembl_gene_id', 'chromosome_name'), all.x=TRUE, suffix=c('.37', '.38'))


# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))

window<-50000

for(i in 1:nrow(indep_hits)){
  Genes_i<-Genes[Genes$start_position < (indep_hits$BP[i] + window) & Genes$end_position > (indep_hits$BP[i] - window) & Genes$chromosome_name == indep_hits$CHR[i],]
  if(nrow(Genes_i) != 0){
    gene_string<-NULL
    for(j in 1:nrow(Genes_i)){
      if(indep_hits$BP[i] > Genes_i$start_position[j] & indep_hits$BP[i] < Genes_i$end_position[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=0,
                                                   Pos=NA))
      }
      if(indep_hits$BP[i] < Genes_i$start_position[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$start_position[j]),
                                                   Pos='-'))
      }
      if(indep_hits$BP[i] > Genes_i$end_position[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$end_position[j]),
                                                   Pos='+'))
      }
    }
    gene_string<-gene_string[order(gene_string$Dist),]
    indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
    indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
  } else {
    indep_hits$NearestGene[i]<-NA
    indep_hits$NearestENSG[i]<-NA
  }
}

##########
# SNP-finemapping
##########

# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread('../../docs/tables/finemapping/Locus_FineMapping_Full_Results.csv')

# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])


##########
# FastBAT
##########

# Read in FastBAT results
fastbat<-fread('../../docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat')
fastbat$P.FDR<-p.adjust(fastbat$Pvalue, method='fdr')

##########
# H-MAGMA
##########

hmagma<-fread('../../docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv')

# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]

# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')

hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')

##########
# TWAS
##########

twas<-fread('../../docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt')
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)

# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes, GRanges(seqnames=chromosome_name, ranges=IRanges(start=start_position, end=end_position)))

twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')

twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]

# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <-  Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]

twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)

twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]

##########
# Colocalisation
##########

coloc<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv')
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]

coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)

##########
# FOCUS 
##########

focus<-read.csv('../../docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv')

fusion <- fread("../../docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt")
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]

focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')

focus_sig<-focus[focus$FOCUS_pip > 0.5,]

focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)

##########
# Expression analysis based high confidence genes
##########

expression_highconf_res<-fread('../../docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv')

expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)

##########
# SMR
##########

# Read in the SMR results
smr_res<-list()

smr_res[['eQTLGen_Blood']]<-fread('../../docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv')

names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'

tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")

for(tissue_i in tissue){
  smr_res[[tissue_i]]<-fread(paste0('../../docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv'))
  smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}

smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")

smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]

##########
# HEIDI
##########

heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]

##########
# PWAS
##########

# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
  if(i != 6){
    pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
  } else {
    pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i)))
    pwas<-rbind(pwas, fread(paste0('../../docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC')))
  }
}

pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)

# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread('../../docs/tables/pwas/rosmap_banner_pwas_smr_results.csv')

pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)



########
# PsyOPS
########

psyops <- fread('../../docs/tables/psyops/psyops_full_eur.cojo.txt')
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL

psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]

psyops_prioritised<-NULL
for(i in unique(psyops_genes$lead_variant)){
  tmp<-psyops_genes[psyops_genes$lead_variant == i,]
  tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
  tmp<-tmp[tmp$distance == min(tmp$distance),]
  psyops_prioritised<-rbind(psyops_prioritised, tmp)
}

Create UpSet plot

This plot will show the number of genes returned by each analysis and the overlap of each analysis

Show code

# Create data.frame listing genes with T/F indicating whether it was supported by each analysis 
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated  & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$P.FDR < 0.05]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id

library(UpSetR)

png('../../docs/figures/gene_consensus_upset_dense.png', units = 'px', res=300, height=1500, width=2500)

upset(fromList(gene_overlap), nsets=10, order.by = "freq")

dev.off()
## png 
##   2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]

png('../../docs/figures/gene_consensus_upset_highconf.png', units = 'px', res=300, height=1400, width=1500)

upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")

dev.off()
## png 
##   2

Show UpSet plot of genes across all analyses

High-confidence genes

High-confidence genes

Show UpSet plot of genes across high-confidence analyses

High-confidence genes

High-confidence genes


Compare high confidence genes across all analyses

Show code

# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)

# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
    if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
      high_conf_tab$finemap[i]<-'Sig'
    } else {
      high_conf_tab$finemap[i]<-'Non-Sig'
    }
  }
}

# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ID[i] %in% twas$ID)){
    high_conf_tab$expression[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
      high_conf_tab$expression[i]<-'Non-Sig'
    } else {
      if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
        high_conf_tab$expression[i]<-'Sig'
      } else {
        if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
            high_conf_tab$expression[i]<-'Pos'
        } else {
            high_conf_tab$expression[i]<-'Neg'
        }
      }
    }
  }
}

# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
    high_conf_tab$protein[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
      high_conf_tab$protein[i]<-'Non-Sig'
    } else {
      if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
          high_conf_tab$protein[i]<-'Pos'
      } else {
          high_conf_tab$protein[i]<-'Neg'
      }
    }
  }
}

# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
    if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$P.FDR < 0.05]){
      high_conf_tab$fastBAT[i]<-'Sig'
    } else {
      high_conf_tab$fastBAT[i]<-'Non-Sig'
    }
  }
}

# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
    if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
      high_conf_tab$hmagma[i]<-'Sig'
    } else {
      high_conf_tab$hmagma[i]<-'Non-Sig'
    }
  }
}

# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
    if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
      high_conf_tab$psyops[i]<-'Sig'
    } else {
      high_conf_tab$psyops[i]<-'Non-Sig'
    }
  }
}

# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'

high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T

high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])

high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID, levels = high_conf_tab_ordered$ID)

names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

library(ggplot2)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
png('../../docs/figures/gene_con_heatmap.png', units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
  theme_minimal_grid(color = "white") +
  geom_tile(colour = 'black', width=1.5) +
  scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    labs(x ='', y = "", title='', fill='') +
  facet_grid(~ variable) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
dev.off()
## png 
##   2

Show heatmap of high confidence associations

High-confidence genes

High-confidence genes

  • Significant genes in from expression based analyses that are based on splicing data are indicated as green as direction of effect is challenging to interpret.

Compare high confidence genes across expression/protein panels and methods

This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).

Show code

###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL

# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'

twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL

all_func_res<-rbind(all_func_res, twas_tmp)

# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_tmp)

# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)

pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_banner_tmp)

# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR

smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_rosmap_tmp)

# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]

# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]

all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))

all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'

all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))


# Create heatmap
library(ggplot2)

heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
    theme_bw()  +
    geom_tile(aes(fill = Z), width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
    scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    geom_text(aes(label=round(Z,1)), color="black", size=3) +
    labs(title="High confidence genes across panels and methods") +
  facet_wrap(~ Group , nrow=1, scales = "free_x")

group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
  group_siz<-rbind(group_siz, data.frame(Group=i,
                           Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}

# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop

library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))

for(i in 1:nrow(group_siz)){
  gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]

}

png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)

grid.draw(gt)

a<-dev.off()

Show heatmap of high confidence associations across expression and protein datasets and methods

High-confidence genes

High-confidence genes

  • Black box indicates significance
  • Green box indicates colocalisation
  • Magenta box indicates FOCUS PIP > 0.5
  • Some MetaBrain Basalganglia, Hippocampus, Spinalcord panels are not present due to not containing any high confidence genes
  • Banner PWAS and ROSMAP SMR results only shown for genes that were signficant in the discovery ROSMAP PWAS.

Query GO terms using PANTHER

Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.

Show code

library(httr)
library(jsonlite)

# the PantherDB URL
panther_db <- "http://pantherdb.org"

# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))

# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]

# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))

# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]

The next step is to query the overrepresentation test.

Show code

# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
    modify_url(url,
        path="/services/oai/pantherdb/enrich/overrep",
        query=list(geneInputList=paste(gene_list, collapse=","),
                   organism=organism_id,
                   annotDataSet=annot_data_set_id,
                   enrichmentTestType="FISHER",
                   correction="FDR"))
}

# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
    # make query
    overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
    # parse JSON
    overrep <- fromJSON(content(overrep_response, "text"))
    return(overrep)
}

high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)

high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)

high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)

Biological processes

Show code

# extract results table from the query
panther_format <- function(query) {
    results <- query$results$result
    results_labels <- results$term
    results_terms <- cbind(results_labels,
                           results[,c('fold_enrichment', 'fdr',
                                      'number_in_list', 'expected', 
                                      'number_in_reference', 'pValue')])
                                      
    return(results_terms)
}

high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)

Show Go: Biological Processes table

# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
GO: Biological Processes
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0007399 nervous system development 2.812943 0.0000000 58 20.6189713 2081 0.0000000
GO:0007275 multicellular organism development 1.931022 0.0000090 79 40.9109719 4129 0.0000000
GO:0048731 system development 1.981079 0.0000128 73 36.8486085 3719 0.0000000
GO:0048856 anatomical structure development 1.745994 0.0000640 87 49.8283549 5029 0.0000000
GO:0006928 movement of cell or subcellular component 2.659549 0.0000794 39 14.6641410 1480 0.0000000
GO:0032502 developmental process 1.669706 0.0001268 92 55.0995192 5561 0.0000000
GO:0032501 multicellular organismal process 1.559432 0.0004556 100 64.1258925 6472 0.0000002
GO:0022008 neurogenesis 2.723282 0.0004159 33 12.1177328 1223 0.0000002
GO:0065008 regulation of biological quality 1.812888 0.0010111 67 36.9575987 3730 0.0000006
GO:0007417 central nervous system development 2.825941 0.0015375 28 9.9082034 1000 0.0000010
GO:0048699 generation of neurons 2.707556 0.0020268 29 10.7107679 1081 0.0000014
GO:0120035 regulation of plasma membrane bounded cell projection organization 3.402016 0.0020683 21 6.1728107 623 0.0000016
GO:0099537 trans-synaptic signaling 4.085119 0.0019212 17 4.1614454 420 0.0000016
GO:0050807 regulation of synapse organization 5.794821 0.0022467 12 2.0708145 209 0.0000020
GO:0031344 regulation of cell projection organization 3.316832 0.0024349 21 6.3313420 639 0.0000023
GO:0050803 regulation of synapse structure or activity 5.633105 0.0025969 12 2.1302637 215 0.0000027
GO:0099536 synaptic signaling 3.812778 0.0035837 17 4.4586915 450 0.0000039
GO:0032879 regulation of localization 1.900136 0.0037639 52 27.3664578 2762 0.0000043
GO:0048812 neuron projection morphogenesis 3.713745 0.0044858 17 4.5775900 462 0.0000054
GO:0009653 anatomical structure morphogenesis 2.032387 0.0046715 44 21.6494244 2185 0.0000060
GO:0120039 plasma membrane bounded cell projection morphogenesis 3.681867 0.0045280 17 4.6172228 466 0.0000061
GO:0000902 cell morphogenesis 3.089586 0.0048074 21 6.7970275 686 0.0000067
GO:0048858 cell projection morphogenesis 3.650532 0.0046069 17 4.6568556 470 0.0000068
GO:0007420 brain development 2.933134 0.0058272 22 7.5005100 757 0.0000089
GO:0010975 regulation of neuron projection development 3.738017 0.0060076 16 4.2803439 432 0.0000096
GO:0032990 cell part morphogenesis 3.508691 0.0067072 17 4.8451115 489 0.0000111
GO:0031346 positive regulation of cell projection organization 4.155796 0.0065795 14 3.3687892 340 0.0000113
GO:0050808 synapse organization 4.462735 0.0063869 13 2.9130118 294 0.0000114
GO:0030182 neuron differentiation 2.577690 0.0066211 26 10.0865511 1018 0.0000123
GO:0048519 negative regulation of biological process 1.545961 0.0064905 81 52.3945796 5288 0.0000124
GO:0034765 regulation of ion transmembrane transport 3.466162 0.0065496 17 4.9045607 495 0.0000130
GO:0007416 synapse assembly 7.838949 0.0068179 8 1.0205450 103 0.0000139
GO:0007268 chemical synaptic transmission 3.765913 0.0080043 15 3.9830978 402 0.0000169
GO:0098916 anterograde trans-synaptic signaling 3.765913 0.0077689 15 3.9830978 402 0.0000169
GO:0098742 cell-cell adhesion via plasma-membrane adhesion molecules 4.587567 0.0086027 12 2.6157657 264 0.0000192
GO:0050890 cognition 4.232400 0.0084954 13 3.0715431 310 0.0000195
GO:0060322 head development 2.761670 0.0093246 22 7.9661955 804 0.0000220
GO:0040011 locomotion 2.352787 0.0091546 29 12.3258050 1244 0.0000222
GO:0043269 regulation of ion transport 2.938180 0.0091558 20 6.8069357 687 0.0000228
GO:0048513 animal organ development 1.746133 0.0098012 55 31.4981786 3179 0.0000250
GO:0034762 regulation of transmembrane transport 3.143039 0.0097187 18 5.7269416 578 0.0000254
GO:0007610 behavior 3.121437 0.0103579 18 5.7665744 582 0.0000278
GO:0010976 positive regulation of neuron projection development 6.055588 0.0102893 9 1.4862305 150 0.0000282
GO:0050804 modulation of chemical synaptic transmission 3.570512 0.0108893 15 4.2010782 424 0.0000306
GO:0048523 negative regulation of cellular process 1.555267 0.0107186 75 48.2232260 4867 0.0000308
GO:0099177 regulation of trans-synaptic signaling 3.562111 0.0106914 15 4.2109864 425 0.0000314
GO:0098609 cell-cell adhesion 3.177315 0.0125648 17 5.3504298 540 0.0000377
GO:0032409 regulation of transporter activity 4.133507 0.0166311 12 2.9031036 293 0.0000509
GO:0001764 neuron migration 6.408030 0.0173416 8 1.2484336 126 0.0000542
GO:0051252 regulation of RNA metabolic process 1.640425 0.0170847 61 37.1854874 3753 0.0000545
GO:0007156 homophilic cell adhesion via plasma membrane adhesion molecules 5.406775 0.0200490 9 1.6645782 168 0.0000652
GO:1900244 positive regulation of synaptic vesicle endocytosis 50.463235 0.0225552 3 0.0594492 6 0.0000748
GO:0048666 neuron development 2.587858 0.0249534 21 8.1148186 819 0.0000844
GO:0010468 regulation of gene expression 1.524443 0.0252772 73 47.8863471 4833 0.0000871
GO:0019219 regulation of nucleobase-containing compound metabolic process 1.591351 0.0255573 64 40.2173976 4059 0.0000897
GO:0032989 cellular component morphogenesis 2.948024 0.0257494 17 5.7665744 582 0.0000920
GO:0032412 regulation of ion transmembrane transporter activity 4.173651 0.0265634 11 2.6355821 266 0.0000966
GO:0007611 learning or memory 4.158019 0.0269512 11 2.6454903 267 0.0000997
GO:2001257 regulation of cation channel activity 5.046323 0.0286563 9 1.7834766 180 0.0001079
GO:0048667 cell morphogenesis involved in neuron differentiation 3.348272 0.0284968 14 4.1812618 422 0.0001091
GO:0050793 regulation of developmental process 1.802990 0.0323324 44 24.4039050 2463 0.0001258
GO:0051179 localization 1.487379 0.0318546 76 51.0966050 5157 0.0001260
GO:0022898 regulation of transmembrane transporter activity 4.022432 0.0328453 11 2.7346641 276 0.0001320
GO:0008088 axo-dendritic transport 8.183227 0.0333436 6 0.7332071 74 0.0001362
GO:0034330 cell junction organization 3.108618 0.0331094 15 4.8252951 487 0.0001373
GO:1903423 positive regulation of synaptic vesicle recycling 37.847426 0.0344040 3 0.0792656 8 0.0001449
GO:0008150 biological_process 1.096351 0.0356215 194 176.9506047 17859 0.0001523
NA UNCLASSIFIED 0.369694 0.0350976 10 27.0493953 2730 0.0001523
GO:0007612 learning 5.382745 0.0389315 8 1.4862305 150 0.0001714
GO:0051049 regulation of transport 1.969862 0.0448064 34 17.2600903 1742 0.0002001
GO:2000311 regulation of AMPA receptor activity 15.527149 0.0445554 4 0.2576133 26 0.0002019
GO:0030154 cell differentiation 1.624036 0.0451396 55 33.8662393 3418 0.0002074
GO:0048869 cellular developmental process 1.613650 0.0480978 55 34.0842197 3440 0.0002240

Show Go: Molecular functions table

# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
GO: Molecular Functions
id label fold_enrichment fdr number_in_list expected number_in_reference pValue

Show Go: Cellular Components table

# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
GO: Cellular Components
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0043005 neuron projection 3.694234 0.0000000 50 13.5346059 1366 0.0000000
GO:0045202 synapse 3.607201 0.0000000 48 13.3067172 1343 0.0000000
GO:0036477 somatodendritic compartment 4.180386 0.0000000 35 8.3724319 845 0.0000000
GO:0030425 dendrite 4.891428 0.0000000 30 6.1331779 619 0.0000000
GO:0097447 dendritic tree 4.875675 0.0000000 30 6.1529943 621 0.0000000
GO:0098794 postsynapse 4.557970 0.0000000 28 6.1430861 620 0.0000000
GO:0030054 cell junction 2.575628 0.0000000 54 20.9657584 2116 0.0000000
GO:0030424 axon 4.429375 0.0000000 28 6.3214338 638 0.0000000
GO:0120025 plasma membrane bounded cell projection 2.479212 0.0000001 55 22.1844674 2239 0.0000000
GO:0042995 cell projection 2.409157 0.0000001 56 23.2446452 2346 0.0000000
GO:0098984 neuron to neuron synapse 5.447736 0.0000009 19 3.4876876 352 0.0000000
GO:0030426 growth cone 7.951783 0.0000040 13 1.6348536 165 0.0000000
GO:0030427 site of polarized growth 7.672773 0.0000055 13 1.6943028 171 0.0000000
GO:0014069 postsynaptic density 5.328416 0.0000066 17 3.1904415 322 0.0000000
GO:0032279 asymmetric synapse 5.246942 0.0000076 17 3.2399825 327 0.0000001
GO:0098793 presynapse 4.099528 0.0000107 21 5.1225412 517 0.0000001
GO:0099572 postsynaptic specialization 4.973188 0.0000139 17 3.4183302 345 0.0000001
GO:0098978 glutamatergic synapse 4.893405 0.0000385 16 3.2697071 330 0.0000003
GO:0150034 distal axon 5.119459 0.0001219 14 2.7346641 276 0.0000011
GO:0097060 synaptic membrane 4.260748 0.0001981 16 3.7552091 379 0.0000019
GO:0043197 dendritic spine 6.417290 0.0002074 11 1.7141192 173 0.0000021
GO:0044309 neuron spine 6.343950 0.0002199 11 1.7339356 175 0.0000024
GO:0031224 intrinsic component of membrane 1.524313 0.0005091 90 59.0429841 5959 0.0000057
GO:0045211 postsynaptic membrane 4.771070 0.0004890 13 2.7247559 275 0.0000058
GO:0098839 postsynaptic density membrane 8.872657 0.0004872 8 0.9016465 91 0.0000060
GO:0099240 intrinsic component of synaptic membrane 6.154053 0.0006943 10 1.6249454 164 0.0000088
GO:0099699 integral component of synaptic membrane 5.975909 0.0023552 9 1.5060469 152 0.0000312
GO:0099634 postsynaptic specialization membrane 6.842473 0.0025466 8 1.1691680 118 0.0000349
GO:0043025 neuronal cell body 3.288846 0.0030381 16 4.8649279 491 0.0000432
GO:0098797 plasma membrane protein complex 2.791880 0.0031135 20 7.1636311 723 0.0000458
GO:0016021 integral component of membrane 1.461694 0.0055241 84 57.4675798 5800 0.0000839
GO:0005891 voltage-gated calcium channel complex 11.468917 0.0074120 5 0.4359610 44 0.0001162
GO:0044295 axonal growth cone 16.821078 0.0094715 4 0.2377969 24 0.0001531
GO:0098590 plasma membrane region 2.188767 0.0098278 27 12.3357132 1245 0.0001637
GO:0044297 cell body 2.888772 0.0108281 16 5.5386857 559 0.0001857
GO:0099061 integral component of postsynaptic density membrane 9.704468 0.0135772 5 0.5152266 52 0.0002395
GO:0099055 integral component of postsynaptic membrane 5.936851 0.0137258 7 1.1790762 119 0.0002488
GO:0034702 ion channel complex 3.713014 0.0138248 11 2.9625528 299 0.0002574
GO:0099146 intrinsic component of postsynaptic density membrane 9.011292 0.0172514 5 0.5548594 56 0.0003296
GO:0098936 intrinsic component of postsynaptic membrane 5.651882 0.0168688 7 1.2385254 125 0.0003306
GO:0120111 neuron projection cytoplasm 6.728431 0.0183688 6 0.8917383 90 0.0003690
GO:0005798 Golgi-associated vesicle 6.582161 0.0200335 6 0.9115547 92 0.0004123
GO:0034703 cation channel complex 4.091614 0.0227101 9 2.1996212 222 0.0004785
GO:0005886 plasma membrane 1.389752 0.0286206 82 59.0033513 5955 0.0006170
GO:0043198 dendritic shaft 11.214052 0.0282800 4 0.3566953 36 0.0006235
GO:0000785 chromatin 2.059724 0.0279234 26 12.6230511 1274 0.0006293
GO:0005794 Golgi apparatus 1.892753 0.0288595 31 16.3782602 1653 0.0006646
GO:0034704 calcium channel complex 7.209034 0.0364030 5 0.6935742 70 0.0008561
GO:0071944 cell periphery 1.348613 0.0451782 86 63.7691971 6436 0.0010846
GO:0098982 GABA-ergic synapse 6.728431 0.0467931 5 0.7431153 75 0.0011463